## Allee and Peinhardt 2011 IO

library(foreign) 
library(Amelia)

## Load original dataset
ap2011 <- read.dta("AP2011 IO Rep Data.dta")
head(ap2011)
dim(ap2011)

## Drop ID vars
ap2011$country <- ap2011$Country_Code <- ap2011$ccode <- ap2011$wbclass <- NULL

## How many variables? 27: no reduction necessary
dim(ap2011)

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(ap2011)
mean(NAs(ap2011)/nrow(ap2011))*100

## Thus: 15 imputations

set.seed(02138)
ap2011.out <- amelia(ap2011, m = 15, ts = "year", cs = "ifscode", lags = c("lnfdi", "bitcount", "pending", "numreg2", "numreg5", "losticsid2", "losticsid5", "lose_settle2", "lose_settle5"), polytime = 3, empri = 0.01*nrow(ap2011))

write.amelia(obj=ap2011.out, file.stem = "AP2011 IO Imp Data", format = "dta", separate = FALSE)